source('jupyterFunctions_perCellType.R')
CT <- 'myeloid'
CT_label <- 'myeloid'
data_prefix <- paste(sep='','../data/',CT,'/',CT)
ATAC_meta <- readRDS(paste(sep='',data_prefix,'_ATAC_meta.rds'))
chosenPeaks <- readRDS(paste(sep='',data_prefix,'_chosenPeaks.rds'))
snATAC_pxc_norm <- readRDS(paste(sep='',data_prefix,'_snATAC_pxc_norm.rds'))
snRNA_gxc_norm <- readRDS(paste(sep='',data_prefix,'_snRNA_gxc_norm.rds'))
snATAC_pxCT_norm <- readRDS(paste(sep='',data_prefix,'_snATAC_pxCT_norm.rds'))
snRNA_gxCT_norm <- readRDS(paste(sep='',data_prefix,'_snRNA_gxCT_norm.rds'))
chromVARz_mat <- readRDS(paste(sep='',data_prefix,'_ArchR_chromVARz_JASPAR2020.rds'))
ArchR_padj <- readRDS(paste(sep='',data_prefix,'_ArchR_padj_JASPAR2020.rds'))
CITE_meta <- readRDS(paste(sep='',data_prefix,'_CITE_meta.rds'))
class_state_df <- readRDS(paste(sep='',data_prefix,'_class_state_df.rds'))
LDA_res <- readRDS(paste(sep='',data_prefix,'_LDA_stats.rds'))
other_resol <- readRDS(paste(sep='',data_prefix,'_ATAC_otherRes.rds'))
CNA_LD <- readRDS(paste(sep='',data_prefix,'_CNA_lymphoidDensity.rds'))
ATAC_colors <- readRDS('../data/misc/ATAC_class_colors.rds')
CITE_colors <- readRDS('../data/misc/CITE_state_colors.rds')
ATAC_CITE_conv_df <- readRDS('../data/misc/ATAC_CITE_sample_conversion.rds')
save_dir <- NA #'../output/' #or NA if don't want to save
#Fig 4a
options(repr.plot.height=6,repr.plot.width=13.5)
g <- ggplot(ATAC_meta,aes_string(x='UMAP1',y='UMAP2',color='cluster_name')) + geom_point(size=1,alpha=0.5) +
theme_classic(base_size=25) + scale_color_manual(values=ATAC_colors) +
labs(color=paste(sep='','RA ',CT_label,'\nchromatin class')) +
theme(legend.text=element_text(size=22)) +
guides(colour = guide_legend(override.aes = list(size=6,alpha=1)))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_ATAC_class_UMAP.png'),
plot=g,units='in',height=6,width=13.5,dpi=600)
#Fig S4a
options(repr.plot.height=6,repr.plot.width=13.5)
g <- cellCount_bySample_barPlot(ATAC_meta,'sample','cluster_name',
paste(sep='','RA ',CT_label,'\nchromatin class'),ATAC_colors)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_ATAC_class_cellCount.png'),
plot=g,units='in',height=6,width=13.5,dpi=600)
chosenGenes <- names(chosenPeaks)
chosenPeaks <- chosenPeaks[!is.na(chosenPeaks)] #NA means no peak in gene's promoter
#Fig 4b bottom
genes_forUMAPs <- c('F13A1','LYVE1','SPP1','FCN1','CD1C')
if(!all(genes_forUMAPs %in% names(chosenPeaks))) stop('Genes for UMAP not in chosen genes')
multiome_cells <- rownames(ATAC_meta[which(ATAC_meta$assay=='snATAC'),])
options(repr.plot.height=2,repr.plot.width=9)
g <- plot_markerPeaks_norm_hex_v2(ATAC_meta[multiome_cells,],snRNA_gxc_norm[genes_forUMAPs,multiome_cells],'UMAP1','UMAP2',
plot_genes=genes_forUMAPs,plotCol=length(genes_forUMAPs),
titleSize=16,hex_bins=34,cutCap=0)
grid.draw(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_markerGene_UMAP.png'),
plot=g,units='in',height=2,width=9,dpi=600)
#Fig 4b top
toPlot <- snATAC_pxc_norm[unname(chosenPeaks[genes_forUMAPs]),multiome_cells]
rownames(toPlot) <- paste(sep='',names(chosenPeaks[genes_forUMAPs]),' peak')
options(repr.plot.height=2,repr.plot.width=9)
g <- plot_markerPeaks_norm_hex_v2(ATAC_meta[multiome_cells,],toPlot,'UMAP1','UMAP2',
plot_genes=rownames(toPlot),plotCol=nrow(toPlot),titleSize=16,hex_bins=34,cutCap=0,
titleFace='plain',colorOpt='plasma')
grid.draw(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_markerPeak_UMAP.png'),
plot=g,units='in',height=2,width=9,dpi=600)
class_order <- c('MA-0','MA-2','MA-4','MA-1','MA-3')
all(class_order %in% ATAC_meta$cluster_abbr)
#Fig S4b
res <- scaleFeat_forHeatmap(chosenGenes,class_order,chosenPeaks,snRNA_gxCT_norm,snATAC_pxCT_norm)
snRNA_gxCT_norm_subset_scaled <- res$gxCT_norm_subset_scaled
snATAC_pxCT_norm_subset_scaled <- res$pxCT_norm_subset_scaled
fxCT_norm_subset_scaled <- res$fxCT_norm_subset_scaled
scale_lim <- max(abs(snRNA_gxCT_norm_subset_scaled),abs(snATAC_pxCT_norm_subset_scaled),na.rm=TRUE)
options(repr.plot.height=7,repr.plot.width=9)
g <- pseudobulk_scaled_heatmap(snRNA_gxCT_norm_subset_scaled,'Gene',paste('RA',CT_label,'chromatin class'),
'Scaled\nMean\nNormalized\nGene\nExpression',
plotTit=paste('Scaled Mean Normalized Gene Expression\nof multiome cells by RA',
CT_label,'chromatin classes'),
scale_lim=scale_lim,clustColors=ATAC_colors)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_markerGene_heatmap.png'),
plot=g,units='in',height=7,width=9,dpi=600)
g <- pseudobulk_scaled_heatmap(snATAC_pxCT_norm_subset_scaled,'Peak',paste('RA',CT_label,'chromatin class'),
'Scaled\nMean\nNormalized\nPeak\nAccessibility',
plotTit=paste('Scaled Mean Normalized Peak Accessibility\nof multiome cells by RA',
CT_label,'chromatin classes'),
scale_lim=scale_lim,clustColors=ATAC_colors)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_markerPeak_heatmap.png'),
plot=g,units='in',height=7,width=9,dpi=600)
pearR <- cor.test(fxCT_norm_subset_scaled$gene_norm_scale,fxCT_norm_subset_scaled$peak_norm_scale,
method='pearson')
fxCT_norm_subset_scaled$label <- ''
fxCT_norm_subset_scaled[which(fxCT_norm_subset_scaled$gene=='CLEC10A' & fxCT_norm_subset_scaled$cluster_abbr=='MA-3'),
'label'] <- 'CLEC10A'
fxCT_norm_subset_scaled[which(fxCT_norm_subset_scaled$gene=='HBEGF' & fxCT_norm_subset_scaled$cluster_abbr=='MA-0'),
'label'] <- 'HBEGF'
g <- ggplot(fxCT_norm_subset_scaled,
aes_string(x='gene_norm_scale',y='peak_norm_scale',color='cluster_abbr',label='label')) +
geom_point(size=2) + theme_classic(base_size=25) + scale_color_manual(values=ATAC_colors) +
labs(x='Scaled Mean Normalized\nGene Expression',
y='Scaled Mean Normalized\nPeak Accessibility',
color=paste(sep='','RA ',CT_label,'\nchromatin\nclass')) +
geom_abline(slope=1,intercept=0,linetype='dashed') +
ggtitle(paste(sep='','R=',round(pearR$estimate,2),' p-value=',signif(pearR$p.value,3))) +
theme(plot.title=element_text(hjust = 0.5)) + geom_text_repel(box.padding = 0.5,size=6.5,fontface='bold',seed=0)
suppressWarnings(print(g)) #points excluded if peak does not exist
if(!is.na(save_dir)) suppressWarnings(ggsave(file=paste(sep='',save_dir,CT,'_markerGenePeak_scatterplot.png'),
plot=g,units='in',height=7,width=9,dpi=600))
#Fig 4c left
#fix cell names
split1 <- str_split_fixed(colnames(chromVARz_mat),'#',2)
new_colnames <- paste(sep='',split1[,1],'_',str_split_fixed(split1[,2],'-',2)[,1])
if(!identical(sort(new_colnames),sort(rownames(ATAC_meta)))) stop('cellnames not consistent b/t ATAC_meta and chromVAR')
colnames(chromVARz_mat) <- new_colnames
motif_toPlot <- 'KLF4_587'
toPlot <- cbind(ATAC_meta,'motif'=chromVARz_mat[motif_toPlot,rownames(ATAC_meta)])
options(repr.plot.height=6,repr.plot.width=8.5)
g <- ggplot(toPlot,aes_string(x='UMAP1',y='UMAP2',color='motif')) + geom_point(size=1,alpha=0.5) +
theme_classic(base_size=25) + scale_color_gradient2(low='blue',mid='white',high='red',midpoint=0) +
labs(color=paste(sep='',str_split_fixed(motif_toPlot,'_',2)[,1],'\nchromVAR\ndeviation'))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_motif_',motif_toPlot,'_UMAP.png'),
plot=g,units='in',height=6,width=8.5,dpi=600)
#Fig 4c right
#add hyphen back
if(!identical(sort(colnames(ArchR_padj)),sort(colnames(snRNA_gxCT_norm))) &
all(str_detect(colnames(ArchR_padj),'^[a-zA-Z]{2}[0-9]+$'))){
colnames(ArchR_padj) <- lapply(colnames(ArchR_padj),FUN=function(s){paste(sep='',substr(s,1,2),'-',
substr(s,3,nchar(s)))})
}
if(!identical(sort(colnames(ArchR_padj)),
sort(colnames(snRNA_gxCT_norm)))) stop('mxCT and gxCT matrices do not have same CT.')
options(repr.plot.height=6,repr.plot.width=12)
g <- ArchR_topMotifs_KWspin(ArchR_padj,snRNA_gxCT_norm,cOrd=class_order,cColors=ATAC_colors,
minE=2,num_mot=8,minGE=0.05,withinE=0.95,
mLab='Transcription Factor Motif',cLab=paste(sep='','RA ',CT_label,'\nchromatin class'))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_motif_heatmap.png'),
plot=g,units='in',height=6,width=12,dpi=600)
#Fig 4d left
gene_toPlot <- 'C1QB'
toPlot <- cbind(ATAC_meta[multiome_cells,],'gene'=snRNA_gxc_norm[gene_toPlot,multiome_cells])
options(repr.plot.height=6,repr.plot.width=8.5)
g <- ggplot(toPlot[order(toPlot$gene),],aes_string(x='UMAP1',y='UMAP2',color='gene')) +
geom_point(size=1,alpha=0.5) +
theme_classic(base_size=25) + scale_color_viridis(option = 'viridis') +
labs(color=paste(sep='',gene_toPlot,'\nNormalized\nGene\nExpression'))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_gene_',gene_toPlot,'_UMAP.png'),
plot=g,units='in',height=6,width=8.5,dpi=600)
#Fig S8c
options(repr.plot.height=6,repr.plot.width=8)
g <- symp_prop_df(ATAC_meta[multiome_cells,],CITE_meta,
paste(sep='','Multiome ',CT_label,' cell state proportions\ninferred from snRNA (n=',
length(unique(ATAC_meta[multiome_cells,'sample'])),')'),
paste(sep='','AMP-RA ',CT_label,'\ncell state proportions (n=',
length(unique(CITE_meta$sample)),')'),
paste(sep='','AMP-RA\n',CT_label,'\ncell state'),clustColors=CITE_colors)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_ATAC_CITE_state_prop.png'),
plot=g,units='in',height=6,width=8,dpi=600)
#Fig 7c left
options(repr.plot.height=6,repr.plot.width=6)
g <- ggplot(ATAC_meta[multiome_cells,],aes_string(x='UMAP1',y='UMAP2',color='CITE')) +
geom_point(size=1,alpha=0.5) +
theme_classic(base_size=25) + scale_color_manual(values=CITE_colors) +
theme(legend.position="none")
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_snATAC_state_UMAP.png'),
plot=g,units='in',height=6,width=6,dpi=600)
#setting order
class_conv_df <- unique(ATAC_meta[,c('cluster_name','cluster_abbr')])
rownames(class_conv_df) <- class_conv_df$cluster_abbr
full_class_order <- class_conv_df[class_order,'cluster_name']
class_state_df$class <- factor(class_state_df$class,levels=class_order)
state_order <- class_state_df[order(class_state_df$class,class_state_df$intOrd),'state']
state_conv_df <- unique(ATAC_meta[,c('CITE','CITE_abbr')])
rownames(state_conv_df) <- state_conv_df$CITE_abbr
full_state_order <- state_conv_df[state_order,'CITE']
#Fig 7c right
#only 2 M-13 cells, so excluding here
fisher_df <- calc_OR(ATAC_meta[which(ATAC_meta$assay=='snATAC' & ATAC_meta$CITE_abbr!='M-13'),], 'cluster_name', 'CITE')
write.table(fisher_df[,c('cluster_name','CITE','OR','pval','padj','CI_low','CI_high')],
file=paste(sep='',save_dir,CT,'_class_state_OR_table.txt'),quote=FALSE,sep='\t',row.names=FALSE)
g <- plot_OR(fisher_df, 'cluster_name', 'CITE',
paste('RA',CT_label,'chromatin class'), paste('AMP-RA',CT_label,'cell state'),
full_class_order, full_state_order,
clustColors=c(ATAC_colors,CITE_colors))
options(repr.plot.height=6,repr.plot.width=12)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_class_state_OR_heatmap.png'),
plot=g,units='in',height=6,width=12,dpi=600)
#Fig S11c
options(repr.plot.height=10,repr.plot.width=12)
g <- LDA_plots(LDA_res,CT,paste('AMP-RA',CT_label,'cell state'),
class_state_df=class_state_df,ctOrd_col='intOrd',ctOrd=class_order,
clustColors=c(CITE_colors,ATAC_colors))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_LDA_heatmap.png'),
plot=g,units='in',height=10,width=12,dpi=600)
#Fig S9c left
gene_toPlot <- 'SPP1'
toPlot <- cbind(ATAC_meta[multiome_cells,],'gene'=snRNA_gxc_norm[gene_toPlot,multiome_cells])
options(repr.plot.height=6,repr.plot.width=8.5)
g <- ggplot(toPlot[order(toPlot$gene),],aes_string(x='UMAP1',y='UMAP2',color='gene')) +
geom_point(size=1,alpha=0.5) +
theme_classic(base_size=25) + scale_color_viridis(option = 'viridis') +
labs(color=paste(sep='',gene_toPlot,'\nNormalized\nGene\nExpression'))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_gene_',gene_toPlot,'_UMAP.png'),
plot=g,units='in',height=6,width=8.5,dpi=600)
#Fig S10c top
if(!identical(sort(rownames(other_resol)),sort(rownames(ATAC_meta)))) stop('rownames need to be the same')
toPlot <- cbind(ATAC_meta,other_resol[rownames(ATAC_meta),])
for(cc in colnames(other_resol)){
cluster_colors <- hue_pal()(length(unique(toPlot[,cc])))
names(cluster_colors) <- sort(unique(toPlot[,cc]))
options(repr.plot.height=6,repr.plot.width=8)
g <- ggplot(toPlot[multiome_cells,],aes_string(x='UMAP1',y='UMAP2',color=cc)) + geom_point(size=1,alpha=0.5) +
theme_classic(base_size=20) + scale_color_manual(values=cluster_colors) +
guides(colour = guide_legend(override.aes = list(size=5,alpha=1)))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_ATAC_',cc,'_UMAP.png'),
plot=g,units='in',height=6,width=8,dpi=600)
}
#Fig S10c bottom
if(!identical(sort(rownames(other_resol)),sort(rownames(ATAC_meta)))) stop('rownames need to be the same')
toPlot <- cbind(ATAC_meta,other_resol[rownames(ATAC_meta),])
for(cc in colnames(other_resol)){
cluster_colors <- hue_pal()(length(unique(toPlot[,cc])))
names(cluster_colors) <- sort(unique(toPlot[,cc]))
fisher_df <- calc_OR(toPlot[which(toPlot$assay=='snATAC' & toPlot$CITE_abbr!='M-13'),], cc, 'CITE')
clustOrd <- reorder_col_diag_plotOR(fisher_df,cc,'CITE',yOrd=full_state_order,mCol='lnOR',op='max')
g <- plot_OR(fisher_df, cc, 'CITE',
paste('RA',CT_label,'chromatin cluster\nat resolution',str_split_fixed(cc,'_',2)[,2]),
paste('AMP-RA',CT_label,'cell state'),
clustOrd, full_state_order,
clustColors=c(cluster_colors,CITE_colors))
options(repr.plot.height=6,repr.plot.width=12)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_',cc,'_state_OR_heatmap.png'),
plot=g,units='in',height=6,width=12,dpi=600)
}
#Fig S12a
tVec <- lapply(sort(unique(ATAC_meta$cluster_name)),replace_space_newline_afterHalf,wiggle=4)
names(tVec) <- sort(unique(ATAC_meta$cluster_name))
options(repr.plot.height=4.25,repr.plot.width=4*length(unique(ATAC_meta$cluster_abbr)))
g <- donor_prop_comp_plot(ATAC_CITE_conv_df,ATAC_meta[which(ATAC_meta$assay=='scATAC'),],CITE_meta,
clustColors=ATAC_colors,tSize=18,tVec=tVec)
grid.draw(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_ATAC_CITE_donor_prop.png'),
plot=g,units='in',height=4.25,width=4*length(unique(ATAC_meta$cluster_abbr)),dpi=600)
#Fig 8b
CNA_LD_FDR <- 0.1
CNA_LD_globalp <- 0.005
CNA_title <- 'Lymphoid Density'
CNA_col <- 'cna_corr_dens'
class_col <- 'ATAC_cluster_name'
toPlot <- CNA_add_col(CITE_meta, CNA_LD, CNA_col)
options(repr.plot.height=6,repr.plot.width=7)
g <- CNA_umap_plots(toPlot,'ATAC_UMAP1','ATAC_UMAP2',CNA_col,
thisTitle=paste(CNA_title,'CNA correlations\n','p =',CNA_LD_globalp),
smallPt=TRUE,fdr_thresh=CNA_LD_FDR)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_',str_replace(CNA_title,' ','_'),'_CNA_UMAP.png'),
plot=g,units='in',height=6,width=7,dpi=600)
#only plot those classes whose median is above FDR thresholds
ll <- aggregate(toPlot[,CNA_col] ~ toPlot[,class_col], FUN=median)
colnames(ll) <- c(class_col,CNA_col)
CNA_classes <- ll[which(abs(ll[,CNA_col])>CNA_LD_FDR),class_col]
options(repr.plot.height=4,repr.plot.width=8)
g <- CNA_violin_plots(toPlot[which(toPlot[,class_col] %in% CNA_classes),],
class_col,CNA_col,CNA_LD_FDR,thisTitle=paste(CNA_title,'CNA'))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_',str_replace(CNA_title,' ','_'),'_CNA_violin.png'),
plot=g,units='in',height=4,width=8,dpi=600)
sessionInfo()